The purpose of this notebook is to analyze item-level data collected for study SGC-3: The Insight Hypothesis

#IMPORT DATA from fall and spring files
fall_items <- "data/fall_sgc3a_blocks.csv"
spring_items <- "data/spring_sgc3a_blocks.csv"
df_fall <- read.csv(fall_items)
df_spring <- read.csv(spring_items)

#Create combined data frame
df_items <- rbind(df_fall, df_spring)

#Create extra fields 
df_items$time_sec <- df_items$rt / 1000 #item time in seconds

#Create answer-consistency column (desired values in column : TRI, ORTH, BOTH, NONE)
df_items$consistency = 0 #set initial dummy values
df_items <- df_items %>% mutate(consistency = replace(consistency, correct==1 & orth_correct==1, "Both"), #both
                                consistency = replace(consistency, correct==0 & orth_correct==1, "Ortho"), #orthogonal
                                consistency = replace(consistency, correct==1 & orth_correct==0, "Tri"), #triangular
                                consistency = replace(consistency, correct==0 & orth_correct==0, "Neither"), #neither
                                consistency = replace(consistency, answer=="", "BLANK")) #neither and BLANK
                   
#Create TOTAL column for future sorting
df_totals <- df_items %>% filter(!q ==16) %>% group_by(subject)  %>% summarise(TOTAL = sum(correct))
df_items <- left_join(df_items, df_totals)

# #Code incorrect responses (not triangular or otho correct)
# df_items$response <- df_items$orth_correct + df_items$correct #items not correct in tri or orth interpretation
# df_items$response <- dplyr::recode( df_items$response, `1`= "right", `2`="both", `0`="wrong" )
# df_items$incorrect <- dplyr::recode( df_items$response, "right"=0, "both"=0, "wrong"=1 )

#Create factors 
df_items <- df_items %>% mutate(
  subject = as.factor(subject),
  session = as.factor(session),
  term = as.factor(term),
  condition = as.factor(condition),
  consistency = as.factor(consistency),
  explicit = as.factor(explicit),
  impasse = as.factor(impasse),
  axis = as.factor(axis),
  q = as.factor(q),
  question = as.factor(question),
  error = 0 #temporary holder for error codes
)

#Change values of column names for later reshaping
# df_items <- rename(df_items, rs_incorrect = incorrect)
df_items <- rename(df_items, rs_tri = correct)
df_items <- rename(df_items, rs_ortho = orth_correct)

#Separate free response from (main) multiple choice blocks
df_freeResponse <- df_items %>% filter(q==16)
df_items <- df_items %>% filter (q!=16)

#Cleanup temporary dataframes
rm(df_fall, df_spring, df_totals)

#Set custom colors 
lgrey = "#cacec8"
lgreen = "#c6edbb"
lyellow = "#FBEEBB"
lblue = "#BCD9EE"

TIMECOURSE OF ACCURACY

I have the intuition that (for an individual participant) accurate — inaccurate responses are not randomly distributed across the timecourse of the study. There should be a substantial learning effect, such that response are incorrect — until the individual has LEARNS how the coordinate system works - and then responses will be correct.

#FILTER question-response data 
l_timecourse <- df_items %>% filter(!q %in% c(6,9,16)) %>% #remove questions 6,9 where tri == ortho correct
  select(subject,impasse,q,consistency,TOTAL)

#ORDER dataframe by subject total amount
l_timecourse <- l_timecourse %>% mutate(subject = fct_reorder(l_timecourse$subject, l_timecourse$TOTAL, min))

#WIDE dataframe (for manual inspection)
w_timecourse <- l_timecourse %>% spread(q,consistency)

#VISUALIZE
facetlabels <- c("1"="control", "2"="impasse")
p1 <- ggplot(l_timecourse %>% filter(impasse ==1), aes(x = q, y = subject, fill = consistency, palette = "jco")) +
  facet_grid(~ impasse, labeller = labeller(impasse=facetlabels)) +
  geom_raster() + scale_fill_manual(values = c("#cacec8", "#FBEEBB", "#BCD9EE","#c6edbb")) +
  labs(x="Question Sequence", fill = "Response Type")

p2 <- ggplot((l_timecourse %>% filter(impasse==2)), aes(x = q, y = subject, fill = consistency)) +
  facet_grid(~ impasse, labeller = labeller(impasse=facetlabels)) +
  geom_raster() + scale_fill_manual(values = c("#cacec8", "#FBEEBB", "#BCD9EE","#c6edbb")) +
  labs(x="Question Sequence", fill = "Response Type", facet="control")

figure <- ggarrange(p1, p2, ncol=1) 
annotate_figure(figure,
        top = text_grob("Timecourse of Response Type by Participant and Condition", color = "black", face = "bold", size = 14))


TODO: This plot of Response Type by Question (in sequence) for each participant demonstrates that … .

#SUMMARIZE items response-type by question (which relation is being asked)
df_items$dummy <- 1 #add a temporary dummy column for counting
df_questions <- df_items %>% 
  filter(!q %in% c(6,9)) %>% 
  group_by(q,condition) %>% 
  select(q,question,rs_tri, rs_ortho, consistency, dummy) %>% 
  summarize(n=n(), 
            rs_tri = sum(rs_tri), 
            rs_ortho = sum(rs_ortho),
            rs_blank = sum(dummy[consistency == "BLANK"]),
            rs_neither = sum(dummy[consistency == "Neither"]),
            question = question[1])

#convert to LONG dataframe
l_questions <- df_questions %>%
  pivot_longer(cols = starts_with("rs_"), names_to = "answer_type", values_to = "count")

#update factor level order in order to maximize comparison in plot
l_questions$answer_type = factor(l_questions$answer_type)
l_questions$answer_type <- relevel(l_questions$answer_type, 'rs_neither')

#TOTAL ACROSS CONDITIONS
ggplot(data = l_questions, aes (x = q, y=count, fill=answer_type))+
  geom_bar(position="stack", stat="identity") + 
  facet_grid(~condition) +
  scale_fill_manual(values = c(lyellow, lgrey,lblue,lgreen)) +
  labs(title = "Answer Type by Question and Condition", x="Question Sequence", y="Count(n)", fill = "Response Type")


TODO: This plot of Response Type by Question (in sequence) and Condition demonstrates that… .


TODO: Test this with a linear mixed effects model. .

WIP ALTERNATIVE ANSWERS

Q1: Starts

What are the alternative answers subjects give for the first question?

#Create a contingency table with marginal sums for all answers across both conditions on Q1
df_q1 <- df_items %>% filter(q==1)
t <- table(df_q1$answer, df_q1$condition)
t <- t[order(-rowSums(t)),] #reorder by marginal freqency
addmargins(t)
##      
##       111 121 Sum
##   A    50   1  51
##   F    10  19  29
##         0  18  18
##   O     0  14  14
##   CF    1   6   7
##   AI    0   3   3
##   AF    1   0   1
##   AO    0   1   1
##   C     0   1   1
##   I     0   1   1
##   Sum  62  64 126

A:: Orthogonal or Satisficing?

img_path <- "images/q1_A.png"
include_graphics(img_path)


(control: 50, impasse: 1) The most common erroneous response is the “A”. In the control condition, this is the orthogonal distractor. However, in the impasse condition, the ‘A’ is not orthogonal to the directed start time, but the closest nearly orthogonal response. Potentially revealing a satisficing strategy? Or visual error in seeing it as an orthogonal intersect?

#A 
subjects <- df_items %>% filter(q==1 & answer == "A") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$condition ==111) & (df_items$answer == "A"),]$error = "orthogonal"
df_items[(df_items$q==1) & (df_items$condition ==121) & (df_items$answer == "A"),]$error = "visualerror-right-30"

F :: Triangular

img_path <- "images/q1_F.png"
include_graphics(img_path)


(control: 10, impasse: 19) The second most common response was the correct (triangular) response “F”. In the impasse condition, there are no orthogonal-distractors. In the control condition, the A datapoint acts as an orthogonal distractor.

#F 
subjects <- df_items %>% filter(q==1 & answer == "F") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$condition ==111) & (df_items$answer == "F"),]$error = ""
df_items[(df_items$q==1) & (df_items$condition ==121) & (df_items$answer == "F"),]$error = ""

No Response

img_path <- "images/q1_blank.png"
include_graphics(img_path)


(control: 0, impasse: 18) The third most common response was (blank) no response, though this only occurred in the impasse condition; likely when subjects could not locate an orthogonal distractor, and chose not to revert to an alternative strategy.

#BLANK 
subjects <- df_items %>% filter(q==1 & answer == "") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "fixed-no-response" #graphical fixedness, no response

O :: Visual Error or Satisficing?

img_path <- "images/q1_O.png"
include_graphics(img_path)


(control: 0, impasse: 14) The fourth most common response was an O though this only occurred in the impasse condition.Similar to the 1 person who entered an ‘A’, the ‘O’ response indicates either a visual-alignment error (thinking the O is an orthogonal intersector) OR a satisficing strategy to select the closes non-orthogonal data point. However, A is both closer (in y-distance height) as well as (x-distance). A has an orthogonal intersection of +0:30m, while O has an orthogonal intersection of -0:30m. However, because O has a greater duration (higher in the graph) the visual angle between 11am and 0 is less than that of A, making it more likely to be confused as true orthogonal intersection. In this sense, it sensical that event O has t[4,2] responses and A only t[1,2] responses.

TODO: Can mousepath be used to differentiate between a visual error (thinking O was orthogonal) and a satisficing decicion (closes point to orthogonal line)? . If the subject traces up from the start time, A is the first data point [impasse condition] they would come across. My inutuition is that folks that trace the orthogonal would select A, and folks that don’t (i.e. are just inspecting the graph and not using the mouse) would be more likely to select 0.

#O 
subjects <- df_items %>% filter(q==1 & answer == "O") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "visualerror-left-30" 

CF :: Lines Connect

img_path <- "images/q1_CF.png"
include_graphics(img_path)


(control: 1, impasse: 6) The fifth most common response was the two-item entry C and F, both of which are diagonal intersects with the start time. However, this response violates the encoding convention that a given referent (i.e start time) has a single spatial encoding in the representation. This suggests a partial understanding of something diagonal about the coordinate system; the reader is attending to the gridlines and using them to intersect data and the x-axis. However, they’ve not decoded the spatial referent of start time vs. end time.

TODO: I would suspect this acts as a transition strategy, or an indication of partial undertanding that may eventually lead to full understanding, and unlikely to revert to orthogonal. MODEL THIS. .

#CF 
subjects <- df_items %>% filter(q==1 & answer == "CF") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "lines-connect" 

AI :: Satisficing or Visual Error

img_path <- "images/q1_AI.png"
include_graphics(img_path)


(control: 0, impasse: 3) The sixth most common response was the two item: A,I, with only having responses in the impasse condition. Both A and I are near-orthogonal distracts of 11AM, with +0:30m offset, suggesting this is either a satisficing or visual error case. We do not see this response in the control condition, most likely because there is a true orthogonal intersect.

#AI
subjects <- df_items %>% filter(q==1 & answer == "AI") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "AI"),]$error = "visualerror-right-2" 

AF :: Clerical Error

img_path <- "images/q1_AF.png"
include_graphics(img_path)


(control: 1, impasse: 0) One subject in the control condition selected both the triangular and orthogonal-consistent responses. Considering this was only a single subject, and the multiple choice checkboxes were directly underneath each other in the response list, it is plausible that this was a clerical error, whereby the subject selected one response, and then the other, without unchecking the other box.

TODO: Can we verify this from the mouseclick stream data?

#AF
subjects <- df_items %>% filter(q==1 & answer == "AF") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lyellow,lgreen,lblue,lgrey)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "AF"),]$error = "clerical" 

AO :: Special Case? Visual Error?

img_path <- "images/q1_AO.png"
include_graphics(img_path)


(control: 0, impasse: 1) One subject in the impasse condition selected both events A and O, each near-orthogonal distractors, but in opposite directions. This sits as a hybrid between ‘lines connect’ in that they are both off the orthogonal intersection, and visual error. However, since the options are both off the orthogonal line, it seems unlikely the individual saw them both as accurate orthogonal intersects.

TODO: What is happening here? Can we recreate from the mouseclick data? Another clerical error? or something more interesting?

#AO
subjects <- df_items %>% filter(q==1 & answer == "AO") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lyellow,lblue,lgreen,lgrey)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "AO"),]$error = "visualerror-right-left" 

**How did this subject have an accidental? triangular answer on Q5?

C :: Backwards

img_path <- "images/q1_C.png"
include_graphics(img_path)


(control: 0, impasse: 1) One subject in the impasse condition selected event C, which is a diagonal intersect with the given start time, but ends, rather than starts, at 11.

I’m surprised to not see more of these type of responses, actually!

#C
subjects <- df_items %>% filter(q==1 & answer == "C") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "AO"),]$error = "backwards" 

**How did this subject have an accidental? triangular answers on Q4 Q10 Q14?

I :: Visual Error?

img_path <- "images/q1_I.png"
include_graphics(img_path)


(control: 0, impasse: 1) One subject in the impasse condition selected event I, which is a near-orthogonal distractor. What is interesting about this answer, is that the individual does NOT also select A, which falls along the same orthogonal intersect. This might suggest that a visual error judging I as orthogonal to 11 (but not A) because the it is so much higher in the graph.

I’m surprised to not see more of these type of responses, actually!

#O
subjects <- df_items %>% filter(q==1 & answer == "O") %>%  select(subject)
ggplot(l_timecourse %>% filter(subject %in% subjects$subject), aes(x = q, y = subject, fill = consistency)) + 
  geom_raster() + scale_fill_manual(values = c(lgrey,lyellow,lblue,lgreen)) + facet_grid(~ impasse)

#CODE ERROR TYPES from manual inference  
df_items[(df_items$q==1) & (df_items$answer == "O"),]$error = "visualerror-right" 

RESPONSE LATENCY

1.1 Distribution of Response Latency

What is the distribution of response times per question?

#SUMMARIZE response time 
time.stats <- favstats(~time_sec, data = df_items)
time.stats
##    min       Q1  median       Q3     max     mean       sd    n missing
##  1.264 13.80425 24.8685 46.07975 336.033 35.46849 33.11745 1890       0

Response time per question (n=1890) ranged from 1.2s to 336s (5.5 minutes), with a M=35.5s, SD = 33.12s.

#VISUALIZE distribution of response times per question
gf_dhistogram(~time_sec, binwidth = 5, data = df_items) %>%
gf_vline(xintercept = ~time.stats$mean, color = "blue") %>%
gf_fitdistr(color="blue") %>% 
gf_labs(title ="Distribution of response latency (per item)")

#VERIFY normality of resulting data with qqPlot
qqPlot(~time_sec, data = df_items)

However, the distribution is clearly not normal, so it is appropriate to transform the response latency variable.

1.2 TRANSFORM Response Latency

#APPLY a log transform
df_items$log_time <- log(df_items$time_sec)
log_time.stats <- favstats(~log_time, data = df_items)
log_time.stats

gf_dhistogram(~log_time, data = df_items) %>%
gf_vline(xintercept = ~log_time.stats$mean, color = "blue") %>%
gf_fitdistr(color="blue") %>% 
gf_labs(title ="Distribution of response latency (LOGT) on first question", x="Log-transform (seconds)")

#VERIFY normality of resulting data with qqPlot
qqPlot(~log_time, data = df_items)


TODO: Is this an appropriate outcome? What about the hump at the start of the plot?.

FIRST QUESTION

1.1 Response Latency (First Question)

What is the distribution of response times on the very first question?

df_q1 <- df_items %>% filter (q==1)
q1.stats <- favstats(~time_sec, data = df_q1)
q1.stats
##    min       Q1 median    Q3     max     mean       sd   n missing
##  7.223 26.61575 39.315 52.17 161.362 44.52736 26.22314 126       0



Response time (in seconds) on the first question (n=126) ranged from 7.2 to 161 seconds, with a M = 44.5, SD = 26.2. The distribution is clearly not-normal.

gf_dhistogram(~ time_sec, data = df_q1) %>%
gf_vline(xintercept = ~q1.stats$mean, color = "blue") %>%
gf_fitdistr(color="blue") %>% # gf_dist("norm", color="blue", params=list(q1.stats$mean, q1.stats$sd), xlim(0,50)) %>% 
gf_labs(title ="Distribution of response latency on first question")

#VERIFY normality of resulting data with qqPlot
qqPlot(~time_sec, data = df_q1)

1.2 Response Latency by Condition (First Question)

Do response times differ by condition? TODO: EXPLORE how to model reaction time. https://lindeloev.github.io/shiny-rt/

time.stats <- favstats(time_sec ~ condition, data = df_q1)
time.stats
##   condition    min       Q1  median     Q3     max     mean       sd  n missing
## 1       111 11.080 22.94175 35.1385 43.280 112.983 37.19852 20.51576 62       0
## 2       121  7.223 32.42225 46.4050 63.909 161.362 51.62717 29.20256 64       0
gf_dhistogram(~time_sec, fill = ~condition, data = df_q1) %>% 
  gf_facet_grid(condition~.) %>% 
  gf_vline(xintercept = ~mean, color = "red", data = time.stats)

The average total response latency (entire 15 question block) for the control condition was slightly higher (M = 9.54s, SD = 3.09) than the impasse-scaffold condition (M = 8.9, 2.69s)

time.stats <- favstats(log_time ~ condition, data = df_q1)
time.stats
##   condition      min       Q1   median       Q3      max     mean        sd  n
## 1       111 2.405142 3.132936 3.559073 3.767658 4.727237 3.483336 0.5181027 62
## 2       121 1.977270 3.477854 3.837388 4.157262 5.083650 3.797496 0.5625236 64
##   missing
## 1       0
## 2       0
gf_dhistogram(~log_time, fill = ~condition, data = df_q1) %>% 
  gf_facet_grid(condition~.) %>% 
  gf_vline(xintercept = ~mean, color = "red", data = time.stats)

qqPlot(~log_time, data = df_q1)

## [1]  65 113
#LINEAR MODEL with dependent variable
m1 <- lm(time_sec ~ condition, data = df_q1)
summary(m1)
## 
## Call:
## lm(formula = time_sec ~ condition, data = df_q1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.404 -17.456  -4.720   9.201 109.735 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    37.199      3.214   11.57  < 2e-16 ***
## condition121   14.429      4.509    3.20  0.00175 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.3 on 124 degrees of freedom
## Multiple R-squared:  0.07627,    Adjusted R-squared:  0.06882 
## F-statistic: 10.24 on 1 and 124 DF,  p-value: 0.001746
#LINEAR MODEL with log-transformed dependent variable
mT <- lm(log_time ~ condition, data = df_q1)
summary(mT)
## 
## Call:
## lm(formula = log_time ~ condition, data = df_q1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8202 -0.3504  0.0503  0.3382  1.2861 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.48334    0.06872  50.686  < 2e-16 ***
## condition121  0.31416    0.09643   3.258  0.00145 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5411 on 124 degrees of freedom
## Multiple R-squared:  0.07885,    Adjusted R-squared:  0.07142 
## F-statistic: 10.61 on 1 and 124 DF,  p-value: 0.001448

The difference in average response time (for the first question) IS statistically significant F(1,124) = 10.61 , p < 0.05, with a linear model predicting response time from condition explaining around 8% of variance. (The log-transform model reduces residual standard error from 25 to 0.5.)

Question Types

TODO consider item type

VALIDATION

Verify item totals

First, verify (sanity check!) that the flatten.js data wrangling scripts were correct by generating participant totals directly from item-level data, and compare with participant level file. Comparison of summarized data from item_level files and participant level file show that the question accuracy totals for each participant are the same.

#SUMMARIZE FROM ITEMS FILES
df_item_sanity <- df_items %>% filter(!q == 16) %>% group_by(subject) %>% summarise(
  tri_correct = sum(rs_tri),
  orth_correct = sum(rs_ortho),
  total = tri_correct + orth_correct) %>% 
  mutate( subject = factor(subject)) %>% 
  arrange(desc(subject))

#SUMMARIZE FROM PARTICIPANT FILES
fall_participants <- "data/fall_sgc3a_participants.csv"
spring_participants <- "data/spring_sgc3a_participants.csv"
df_fall_p <- read.csv(fall_participants)
df_spring_p <- read.csv(spring_participants)
df_participants_sanity <- rbind(df_fall_p, df_spring_p) %>% 
  mutate(subject = factor(subject), 
         tri_correct = triangular_score, 
         orth_correct = orthogonal_score,
         total = tri_correct + orth_correct
         ) %>% 
  select(subject, tri_correct, orth_correct, total) %>% 
  arrange(desc(subject))

#CHECK EQUALITY
all_equal(df_participants_sanity,df_item_sanity)
## [1] TRUE
#REMOVE TEMPORARY DFS
rm(fall_participants, spring_participants, df_participants_sanity, df_fall_p,df_spring_p)

df_items_by_participant <- df_item_sanity
rm(df_item_sanity, df_fall, df_spring)
## Warning in rm(df_item_sanity, df_fall, df_spring): object 'df_fall' not found
## Warning in rm(df_item_sanity, df_fall, df_spring): object 'df_spring' not found

Verify session totals

How many subjects were run in each data collection session?

#MANUALLY INSPECT SESSIONS
df_items %>% group_by(session) %>% 
  summarize(n=length(unique(subject)))
## # A tibble: 22 × 2
##    session     n
##    <fct>   <int>
##  1 alpha       4
##  2 bravo       7
##  3 charlie    15
##  4 delta       2
##  5 echo        9
##  6 fire        5
##  7 flower      1
##  8 foxtrot     3
##  9 golf       10
## 10 heaven      3
## # … with 12 more rows